{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from keras.preprocessing import sequence\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Embedding, Dropout\n",
    "from keras.layers import LSTM\n",
    "from keras.datasets import imdb\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "from bs4 import BeautifulSoup\n",
    "import re\n",
    "from keras.preprocessing.text import Tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#(x_train, y_train), (x_test, y_test) = imdb.load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                              review sentiment\n",
      "0  One of the other reviewers has mentioned that ...  positive\n",
      "1  A wonderful little production. <br /><br />The...  positive\n",
      "2  I thought this was a wonderful way to spend ti...  positive\n",
      "3  Basically there's a family where a little boy ...  negative\n",
      "4  Petter Mattei's \"Love in the Time of Money\" is...  positive\n"
     ]
    }
   ],
   "source": [
    "dataset = pd.read_csv(r'movie_reviews.csv')\n",
    "\n",
    "print(dataset.head())\n",
    "\n",
    "reviews = np.array(dataset['review'])\n",
    "sentiments = np.array(dataset['sentiment'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def strip_html_tags(text):\n",
    "    soup = BeautifulSoup(text, \"html.parser\")\n",
    "    stripped_text = soup.get_text()\n",
    "    return stripped_text\n",
    "\n",
    "clean_docs = []\n",
    "\n",
    "for doc in reviews:\n",
    "    doc = strip_html_tags(doc)\n",
    "    doc = doc.lower().strip()\n",
    "    doc = re.sub('[^a-zA-z0-9\\s]','',doc)\n",
    "    clean_docs.append(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tokenizer = Tokenizer(num_words=10000, split=' ')\n",
    "tokenizer.fit_on_texts(clean_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'kuttram': 88006,\n",
       " 'payola': 56325,\n",
       " 'viscousif': 161324,\n",
       " 'sheso': 128104,\n",
       " 'vsexy': 206912,\n",
       " 'michalka': 48912,\n",
       " 'intended': 1331,\n",
       " 'rally': 12221,\n",
       " 'coprology': 92641,\n",
       " 'businesses': 17527,\n",
       " 'caveshokey': 200779,\n",
       " 'pieomys': 140773,\n",
       " 'wallah': 44909,\n",
       " 'beginnig': 154141,\n",
       " 'onceto': 200875,\n",
       " 'scenography': 59031,\n",
       " 'storynow': 61116,\n",
       " 'khoo': 136733,\n",
       " 'peopleprudes': 122060,\n",
       " 'dangerfieldi': 150247,\n",
       " 'abbad': 162652,\n",
       " 'pointcounterpoint': 153619,\n",
       " 'houndside': 183681,\n",
       " 'aarika': 73428,\n",
       " 'ushpizin': 167708,\n",
       " 'filmcontrary': 151610,\n",
       " 'goodwin': 42771,\n",
       " 'allthis': 15806,\n",
       " 'monstersontherampage': 160875,\n",
       " 'newell': 36639,\n",
       " 'frequencies': 79149,\n",
       " 'batjacproduction': 107750,\n",
       " 'iswas': 21911,\n",
       " 'bloodtaking': 129314,\n",
       " 'whammo': 128803,\n",
       " 'failsthen': 100142,\n",
       " 'directiongrade': 144701,\n",
       " 'rahims': 214038,\n",
       " 'ramzi': 71792,\n",
       " 'momentsfor': 111870,\n",
       " 'pavloskys': 208507,\n",
       " 'singlemalt': 183751,\n",
       " 'zanzibar': 120214,\n",
       " 'httpowlypttp': 142080,\n",
       " 'spyl': 94961,\n",
       " 'flatulence': 24471,\n",
       " '30th': 21731,\n",
       " 'sidneyeven': 209922,\n",
       " 'francs': 27098,\n",
       " 'legendtoday': 169917,\n",
       " 'noninvolvement': 64840,\n",
       " 'bonsais': 170995,\n",
       " 'late70searly': 107569,\n",
       " 'kulak': 174180,\n",
       " 'timeshowever': 203314,\n",
       " 'jumpthat': 222011,\n",
       " 'eightiesand': 96822,\n",
       " 'streetbut': 128247,\n",
       " 'fiat': 135418,\n",
       " 'nutshelllight': 124059,\n",
       " 'clouzot': 15880,\n",
       " 'complaints3': 220515,\n",
       " 'mikesthe': 92420,\n",
       " 'timevery': 55435,\n",
       " 'out': 43,\n",
       " 'sketching': 36912,\n",
       " 'cercueils': 88588,\n",
       " 'undeservingly': 156688,\n",
       " 'film1010': 62892,\n",
       " 'riegerthis': 137156,\n",
       " 'irisingthere': 106569,\n",
       " 'childrenwhenever': 110268,\n",
       " 'fanglys': 35965,\n",
       " 'theslimypoliticians': 198752,\n",
       " 'winner': 2454,\n",
       " 'lowbudgetundergroundexperimental': 120516,\n",
       " 'bourneslick': 107542,\n",
       " 'cameraseverything': 120370,\n",
       " 'mangler': 42486,\n",
       " 'locationed': 177067,\n",
       " 'nut9510': 89195,\n",
       " 'offspoiler': 175870,\n",
       " 'robison': 51767,\n",
       " 'animes': 21847,\n",
       " 'outamitabh': 150136,\n",
       " 'indeeddont': 144890,\n",
       " 'dishwashing': 81195,\n",
       " 'sylvie': 29392,\n",
       " 'dramaturgy': 29559,\n",
       " 'roughbut': 152902,\n",
       " 'mindalthough': 71303,\n",
       " 'fantasys': 55022,\n",
       " 'supposedtrue': 154190,\n",
       " 'fiveo': 57197,\n",
       " 'holiest': 172690,\n",
       " 'audiencebut': 62108,\n",
       " 'paysit': 219600,\n",
       " 'janethere': 139805,\n",
       " 'niobe': 89475,\n",
       " 'munter': 68482,\n",
       " 'momentsall': 80487,\n",
       " 'unmasks': 58295,\n",
       " 'knifewhich': 171293,\n",
       " 'chalte': 68435,\n",
       " 'vampireits': 64412,\n",
       " 'fourthwallbusting': 217146,\n",
       " 'halfif': 96244,\n",
       " 'hazer': 118347,\n",
       " 'disadvantages': 26807,\n",
       " 'audley': 76479,\n",
       " 'corpsethe': 161819,\n",
       " 'bernini': 67324,\n",
       " 'imagesi': 66020,\n",
       " 'badlystaged': 191380,\n",
       " 'with9': 205843,\n",
       " 'said3': 141998,\n",
       " 'galiena': 162497,\n",
       " 'accordingly': 13076,\n",
       " '1887': 133718,\n",
       " 'aroundone': 74350,\n",
       " '1928s': 163279,\n",
       " 'sadeqi': 156401,\n",
       " 'balrog': 43934,\n",
       " 'beholdwhile': 196624,\n",
       " 'flynns': 11008,\n",
       " 'herovillain': 100562,\n",
       " 'eithergilmore': 192248,\n",
       " 'logicthis': 181231,\n",
       " 'inspector': 3491,\n",
       " '1975s': 59291,\n",
       " 'arp': 77413,\n",
       " 'boggle': 43918,\n",
       " 'sadi': 82260,\n",
       " 'yvelines': 30787,\n",
       " 'waif': 23156,\n",
       " 'radicalsi': 68454,\n",
       " 'serious': 575,\n",
       " 'mcgraws': 54470,\n",
       " 'friendend': 117743,\n",
       " 'connectivity': 136601,\n",
       " 'pseudonyms': 63281,\n",
       " 'makingand': 169185,\n",
       " 'directedmy': 142927,\n",
       " 'hitsone': 183972,\n",
       " 'determination': 6704,\n",
       " 'priveghi': 73282,\n",
       " 'donorwolf': 79733,\n",
       " 'barryrichard': 106039,\n",
       " 'statesnow': 204131,\n",
       " 'mrinal': 51186,\n",
       " 'afterhowever': 87142,\n",
       " 'lazily': 23727,\n",
       " 'webalthough': 170751,\n",
       " 'battlegroundthe': 90523,\n",
       " 'pairing': 7776,\n",
       " 'brisco': 48692,\n",
       " 'sordie': 84465,\n",
       " 'chalk': 11494,\n",
       " 'hypocrisya': 217240,\n",
       " 'clubkids': 105027,\n",
       " 'appearancethis': 132325,\n",
       " 'nostalgiafirst': 209049,\n",
       " 'manis': 69964,\n",
       " 'rentingbuyingwatching': 85643,\n",
       " 'armsgreat': 188146,\n",
       " 'overintellectualize': 179761,\n",
       " 'rangerit': 75188,\n",
       " 'bumsmy': 70317,\n",
       " 'hangin': 44529,\n",
       " 'ryderall': 152732,\n",
       " 'cultfilm': 191737,\n",
       " 'chivo': 42708,\n",
       " 'idealists': 47846,\n",
       " 'kz': 140177,\n",
       " 'nondescript': 14361,\n",
       " 'comensurate': 198465,\n",
       " 'gaga': 38210,\n",
       " 'subparobvious': 202551,\n",
       " 'brooma': 110296,\n",
       " 'whalehugger': 85509,\n",
       " 'doyes': 154177,\n",
       " 'guessits': 132678,\n",
       " 'kamaljothika': 87993,\n",
       " 'amrish': 30513,\n",
       " 'stalones': 115340,\n",
       " 'bullitt': 16364,\n",
       " 'pictureif': 74299,\n",
       " 'rowdy': 15937,\n",
       " 'fairytale': 9136,\n",
       " 'comedyspoof': 47894,\n",
       " 'jpc': 127979,\n",
       " '1grade': 212573,\n",
       " 'lloydpack': 161793,\n",
       " 'site': 2063,\n",
       " 'grouping': 43294,\n",
       " 'untalent': 130343,\n",
       " 'grassroots': 38198,\n",
       " 'goateed': 54829,\n",
       " 'annos': 144888,\n",
       " 'keitaro': 28109,\n",
       " 'timeisnt': 119603,\n",
       " 'nopeople': 165538,\n",
       " 'deserveso': 196440,\n",
       " 'bands': 3996,\n",
       " 'farnsworths': 36136,\n",
       " 'mussolini': 17547,\n",
       " 'earththis': 205837,\n",
       " 'doall': 210968,\n",
       " 'rotating': 20174,\n",
       " 'younggoon': 168251,\n",
       " 'preorders': 95539,\n",
       " 'consolations': 204026,\n",
       " 'lessthanten': 193893,\n",
       " 'embankment': 78479,\n",
       " 'fanclub': 108074,\n",
       " 'ironhead': 50034,\n",
       " 'adolescents': 15134,\n",
       " 'pycho': 132637,\n",
       " 'involved1': 89080,\n",
       " 'hennessey': 39788,\n",
       " 'digitized': 48478,\n",
       " 'lunas': 54566,\n",
       " 'cokesnorting': 54257,\n",
       " 'trades': 17012,\n",
       " 'breckinridge': 18336,\n",
       " 'muckerji': 60561,\n",
       " 'curiousity': 38260,\n",
       " 'juxtaposing': 24859,\n",
       " 'worest': 154278,\n",
       " 'collector': 8207,\n",
       " 'yakkity': 159946,\n",
       " 'tomnot': 97325,\n",
       " 'xu': 30764,\n",
       " 'misfortunate': 169719,\n",
       " 'prescottmarshall': 183148,\n",
       " 'languagei': 38957,\n",
       " 'unlikabledownsidenot': 195911,\n",
       " 'stockler': 36677,\n",
       " 'garloupis': 96580,\n",
       " 'finkkeith': 182690,\n",
       " 'exhusbandexwife': 166305,\n",
       " 'lemon': 12682,\n",
       " 'nielsens': 44746,\n",
       " 'harels': 145793,\n",
       " 'scarfaced': 57722,\n",
       " 'oneself': 11144,\n",
       " 'lowtheyre': 144390,\n",
       " 'unprofessionalism': 150358,\n",
       " '4th': 6018,\n",
       " 'wifetony': 168131,\n",
       " 'teamed': 9222,\n",
       " 'shipscrew': 96482,\n",
       " 'euphoria': 43284,\n",
       " 'didit': 47927,\n",
       " 'stationrather': 96017,\n",
       " 'migratory': 207667,\n",
       " 'cheapcheap': 111825,\n",
       " 'gradethe': 44510,\n",
       " 'nostalgic': 4646,\n",
       " 'hotchickinpanties': 95341,\n",
       " 'filmographymegalomania': 175603,\n",
       " 'linton': 175266,\n",
       " 'where': 112,\n",
       " 'affairscoronation': 108294,\n",
       " 'unscrupulous': 13230,\n",
       " 'wellevenly': 105622,\n",
       " 'ashwini': 194088,\n",
       " 'daydreamingif': 174069,\n",
       " 'gauthamthe': 191198,\n",
       " 'situationacting': 114771,\n",
       " 'mst3kavoid': 92530,\n",
       " 'stingonly': 196230,\n",
       " 'ayin': 139087,\n",
       " 'teamthe': 34039,\n",
       " 'retrained': 120756,\n",
       " 'ducktails2': 215809,\n",
       " 'uninterestingwe': 105202,\n",
       " 'greuesome': 95181,\n",
       " 'subtracted': 34444,\n",
       " 'thirdthe': 74296,\n",
       " 'funnybecause': 87808,\n",
       " 'inspectorelectrocution': 144110,\n",
       " 'laughing': 1017,\n",
       " 'murdereras': 145924,\n",
       " 'fathergetting': 167672,\n",
       " 'goingthis': 110945,\n",
       " 'oftdone': 194541,\n",
       " 'unforgettable': 3193,\n",
       " 'sanitizing': 86220,\n",
       " 'them2': 56682,\n",
       " 'highsociety': 53447,\n",
       " 'pesta': 192967,\n",
       " 'allpresiding': 175500,\n",
       " 'intune': 135567,\n",
       " 'lvres': 50657,\n",
       " 'graveall': 183022,\n",
       " 'devise': 17701,\n",
       " 'midninetieswhile': 133076,\n",
       " 'defection': 49165,\n",
       " 'personalitiesdown': 191727,\n",
       " 'petr': 48457,\n",
       " 'lovedmatt': 139451,\n",
       " 'longhardjourney': 142969,\n",
       " 'mindbogglingly': 30952,\n",
       " 'memore': 190699,\n",
       " 'misdemeanors': 47754,\n",
       " 'banshees': 57799,\n",
       " 'ideologies': 17895,\n",
       " 'operates': 12521,\n",
       " 'harnesses': 82729,\n",
       " 'levelso': 111483,\n",
       " 'umcompromising': 86676,\n",
       " 'overboiled': 136784,\n",
       " 'breathtakingi': 100751,\n",
       " 'doctor': 883,\n",
       " 'scissorsto': 211298,\n",
       " '21st': 5807,\n",
       " 'op78': 188124,\n",
       " 'flatly': 21149,\n",
       " 'nightmarefrom': 80890,\n",
       " 'partsjean': 152265,\n",
       " 'manhattanbased': 102774,\n",
       " 'connivance': 73831,\n",
       " 'windworshipers': 82991,\n",
       " 'agreeif': 133824,\n",
       " 'apartthe': 73993,\n",
       " 'productionno': 185314,\n",
       " 'aiello': 13028,\n",
       " 'writerswhere': 122602,\n",
       " 'endwith': 42080,\n",
       " 'marriageminded': 70706,\n",
       " 'birol': 179171,\n",
       " 'horsetraders': 169472,\n",
       " 'basisif': 192768,\n",
       " 'creampie': 92978,\n",
       " 'victimizing': 46463,\n",
       " 'except\\x85': 145846,\n",
       " 'roomusing': 196689,\n",
       " 'plied': 64864,\n",
       " 'removal': 16366,\n",
       " 'scenewhat': 80790,\n",
       " 'storyboarddirector': 94323,\n",
       " 'quess': 77689,\n",
       " 'detest': 25779,\n",
       " 'skillfully': 9135,\n",
       " 'nosdam': 116964,\n",
       " 'glitchy': 93188,\n",
       " 'kidsheres': 160756,\n",
       " 'continentsjewish': 123375,\n",
       " 'ctrlc': 130254,\n",
       " 'signedpetloveritual': 135603,\n",
       " 'yettocome': 155590,\n",
       " 'followon': 60493,\n",
       " 'secretos': 183347,\n",
       " 'whoattacked': 152421,\n",
       " 'midwaysurya': 136071,\n",
       " 'anybodyto': 162439,\n",
       " 'mundanegentleselfconsciously': 179786,\n",
       " 'akki': 35838,\n",
       " 'smileso': 81678,\n",
       " 'arise': 9739,\n",
       " 'deflector': 74626,\n",
       " 'selfacceptance': 200221,\n",
       " 'nye': 47654,\n",
       " 'therejess': 172309,\n",
       " 'goose': 12064,\n",
       " 'piste': 85034,\n",
       " 'immeasurablythe': 148387,\n",
       " 'dtent': 167668,\n",
       " 'rightman': 153326,\n",
       " 'ga': 28114,\n",
       " 'chappelle': 19912,\n",
       " 'hig': 149905,\n",
       " 'roaringsquawking': 101069,\n",
       " 'discussing': 5605,\n",
       " 'downandout': 31828,\n",
       " 'contemptible': 24566,\n",
       " 'pictorially': 60219,\n",
       " 'reunions': 47195,\n",
       " 'gagshowever': 85976,\n",
       " 'herberts': 33128,\n",
       " 'characterdevelopmentdenouemententire': 92911,\n",
       " 'matterof': 152905,\n",
       " 'mosesat': 184608,\n",
       " 'gumppeter': 178521,\n",
       " 'readi': 73434,\n",
       " 'excomrades': 81198,\n",
       " 'uninspiredi': 203684,\n",
       " 'inreign': 207195,\n",
       " 'burgade': 22718,\n",
       " 'opined': 42019,\n",
       " 'deletion': 62426,\n",
       " 'sakrileg': 142960,\n",
       " 'end8': 185540,\n",
       " 'thank': 1378,\n",
       " 'duvallthe': 186452,\n",
       " 'dismembermentmaybe': 170727,\n",
       " 'christmasrelated': 162040,\n",
       " 'badacted': 45358,\n",
       " 'screensmore': 131285,\n",
       " 'foist': 32930,\n",
       " 'thinkingthrough': 92037,\n",
       " 'candied': 121769,\n",
       " 'planetout': 199985,\n",
       " 'horned': 34841,\n",
       " 'governmentdepartmentalthough': 164951,\n",
       " 'codenamedragonfly': 71846,\n",
       " 'tvwife': 171819,\n",
       " 'cornfieldthis': 139479,\n",
       " 'helli': 51290,\n",
       " 'schoolchiasmus': 135901,\n",
       " 'benvictor': 79273,\n",
       " 'adultsthis': 48394,\n",
       " 'resultdecent': 147623,\n",
       " 'www1tvru': 176197,\n",
       " 'alltooaverage': 126416,\n",
       " 'sizzlingas': 165404,\n",
       " 'boosters': 63686,\n",
       " 'hairi': 54457,\n",
       " 'happenstances': 196637,\n",
       " 'scientistplastic': 184256,\n",
       " 'climaxas': 75022,\n",
       " 'matkondar': 138884,\n",
       " 'elisabeths': 72744,\n",
       " 'usersbut': 113357,\n",
       " 'yardsticks': 129204,\n",
       " 'madhousemade': 135084,\n",
       " 'threateningthe': 72317,\n",
       " 'radiovoice': 165264,\n",
       " 'albany': 51892,\n",
       " 'freaked': 9676,\n",
       " 'hamster': 24866,\n",
       " 'crotchhugging': 126392,\n",
       " 'rodriguezdesperado': 193336,\n",
       " 'janitor': 10157,\n",
       " 'rounding': 16758,\n",
       " 'againbravo': 92374,\n",
       " 'screensjack': 215848,\n",
       " 'sequencesexotic': 114948,\n",
       " 'drzhivago': 130383,\n",
       " 'mitchumwhos': 137988,\n",
       " 'outisnt': 113618,\n",
       " 'healy': 15988,\n",
       " 'emperor': 4444,\n",
       " 'chauvelins': 208048,\n",
       " 'wayah': 128151,\n",
       " 'grabs': 5386,\n",
       " 'mcintyres': 49601,\n",
       " 'thrillerssadly': 169079,\n",
       " 'beginsmy': 204102,\n",
       " 'thisoverall': 54813,\n",
       " 'momentssejin': 218646,\n",
       " 'sentencetalk': 163902,\n",
       " 'cancelled': 8431,\n",
       " 'marsalis': 79245,\n",
       " 'renna': 139148,\n",
       " 'ambitiouscrap': 133162,\n",
       " 'arehairspray': 205414,\n",
       " 'feud': 11980,\n",
       " 'meteorites': 72295,\n",
       " 'really4': 170389,\n",
       " 'runneresque': 167819,\n",
       " 'salena': 25460,\n",
       " 'televisionsubjects': 178667,\n",
       " 'gallaghers': 45442,\n",
       " 'reue': 173363,\n",
       " 'antique': 15511,\n",
       " 'corleones': 31489,\n",
       " 'warbles': 62929,\n",
       " 'thourghly': 157667,\n",
       " 'dayle': 210027,\n",
       " 'sceneshutton': 200504,\n",
       " 'politicallythemed': 201110,\n",
       " 'interpretations': 7999,\n",
       " 'frugality': 191322,\n",
       " 'simplified4': 188346,\n",
       " 'filmsenter': 95806,\n",
       " 'mafioso': 22607,\n",
       " 'mentiontheres': 163417,\n",
       " 'ifwhen': 96199,\n",
       " 'forensic': 15358,\n",
       " 'bowlers': 80822,\n",
       " 'yessusan': 130425,\n",
       " 'knowsome': 117940,\n",
       " 'bleibtreau': 151992,\n",
       " 'leclerc': 47344,\n",
       " 'aurvaag': 33018,\n",
       " 'sbd': 180010,\n",
       " 'hella': 54416,\n",
       " 'typein': 215591,\n",
       " 'crapbatman': 84068,\n",
       " 'tres': 42991,\n",
       " 'suvs': 43669,\n",
       " 'wantssome': 189033,\n",
       " 'resumesi': 93733,\n",
       " 'alljackie': 167164,\n",
       " 'sappylinemaking': 139069,\n",
       " 'aliveif': 186896,\n",
       " 'burnett': 18237,\n",
       " 'lajja2001': 149698,\n",
       " 'cairoso': 207171,\n",
       " 'adulthood': 10770,\n",
       " 'larryjoe76': 136569,\n",
       " 'teasedthe': 168197,\n",
       " 'marple': 30139,\n",
       " 'plotall': 50085,\n",
       " 'recherch': 89323,\n",
       " 'roughies': 57536,\n",
       " 'areconfirmed': 185433,\n",
       " 'peppoire': 74059,\n",
       " 'curly': 6431,\n",
       " 'materialand': 67590,\n",
       " 'workmanlike': 18010,\n",
       " 'journalism': 12967,\n",
       " 'pressureconformity': 127875,\n",
       " 'howeverand': 79160,\n",
       " 'overstylized': 33797,\n",
       " 'lauterthe': 187850,\n",
       " '13thclone': 144076,\n",
       " 'bogstandard': 43640,\n",
       " 'youthfully': 221332,\n",
       " 'mixedfabric': 221993,\n",
       " 'worthwhilebut': 212396,\n",
       " 'raymarthere': 149530,\n",
       " 'gilscott': 153262,\n",
       " '10292006': 191347,\n",
       " 'warholsthe': 157188,\n",
       " 'cringesuccessful': 189212,\n",
       " 'republicanismnineties': 196016,\n",
       " 'bulllike': 188296,\n",
       " 'krasinski': 26648,\n",
       " 'eitherannette': 141256,\n",
       " 'wallonia': 184710,\n",
       " 'mighta': 133899,\n",
       " 'excavations': 187228,\n",
       " 'nonconformism': 177504,\n",
       " 'metajokes': 219403,\n",
       " 'ifyoure': 131855,\n",
       " 'muddid': 170501,\n",
       " 'lunchmeat': 186056,\n",
       " 'sssr': 210548,\n",
       " 'lolit': 51110,\n",
       " 'otherits': 169624,\n",
       " 'gromitfan': 181614,\n",
       " 'entertainmentafter': 162664,\n",
       " 'piquant': 185336,\n",
       " 'regularity': 38181,\n",
       " 'incautos': 60102,\n",
       " 'caparoula': 172186,\n",
       " '8all': 191764,\n",
       " 'ej': 52198,\n",
       " 'spits': 12649,\n",
       " 'funalong': 182494,\n",
       " 'directionset': 41310,\n",
       " 'thingsbushwhacked': 190630,\n",
       " 'monsterthe': 38918,\n",
       " 'postfact': 181443,\n",
       " 'realif': 58484,\n",
       " 'welldisplayed': 178890,\n",
       " 'igniminiously': 113343,\n",
       " 'sidewindows': 122804,\n",
       " 'griffins': 46025,\n",
       " 'amply': 25511,\n",
       " 'websterjimmy': 156626,\n",
       " 'hitmanthere': 89058,\n",
       " 'listenbig': 122730,\n",
       " 'sledding': 42607,\n",
       " 'couture': 59616,\n",
       " 'response': 3407,\n",
       " 'stresses': 17264,\n",
       " 'jeremys': 34900,\n",
       " 'kojo': 132764,\n",
       " 'fuccon': 169057,\n",
       " 'lovecraftwhich': 138863,\n",
       " 'chooses': 4983,\n",
       " 'thismindblowingly': 87699,\n",
       " 'roundtable': 195924,\n",
       " 'colagrandes': 45983,\n",
       " 'rage': 3976,\n",
       " 'holiday': 3113,\n",
       " 'charactersalthough': 144716,\n",
       " 'planethow': 133654,\n",
       " 'bergens': 61223,\n",
       " 'claudia': 12559,\n",
       " 'highestrated': 90487,\n",
       " 'solving': 9173,\n",
       " 'magnums': 67567,\n",
       " 'payoff': 6115,\n",
       " 'mcgangs': 171564,\n",
       " 'apostle': 33603,\n",
       " 'alcoholdrugs': 186393,\n",
       " 'effectanyway': 151205,\n",
       " 'albatrosss': 176723,\n",
       " 'charendoff': 103339,\n",
       " 'lowestcommondenominator': 66131,\n",
       " 'armatures': 74775,\n",
       " 'braccos': 122747,\n",
       " 'doesntif': 156370,\n",
       " 'balletic': 185326,\n",
       " 'uncontrollably': 20564,\n",
       " 'fistycuffs': 187651,\n",
       " 'panache': 13226,\n",
       " 'becomesits': 153357,\n",
       " 'meeropol': 70617,\n",
       " 'ethiers': 73496,\n",
       " 'thinkmunchie': 127569,\n",
       " 'untill': 26563,\n",
       " 'persuading': 31406,\n",
       " 'missionjustice': 167711,\n",
       " 'blankeyed': 152590,\n",
       " 'presson': 38338,\n",
       " 'ethnographic': 164403,\n",
       " 'derns': 46010,\n",
       " 'highestquality': 179201,\n",
       " 'eliminationjoe': 189158,\n",
       " 'harebrained': 57211,\n",
       " 'hardcores': 206758,\n",
       " 'childhoodthere': 69070,\n",
       " 'atin': 151533,\n",
       " 'jackaass': 98214,\n",
       " 'dopers': 50235,\n",
       " 'howweird': 177429,\n",
       " 'pixiedust': 102081,\n",
       " 'eviction': 47375,\n",
       " 'happeningit': 90028,\n",
       " 'playedand': 96795,\n",
       " 'jordana': 20913,\n",
       " 'clos': 188484,\n",
       " 'motivational': 24208,\n",
       " 'hehehe': 54019,\n",
       " 'macfarlane': 47274,\n",
       " 'fourth': 2861,\n",
       " 'speilburg': 189661,\n",
       " 'pteradactyl': 85025,\n",
       " 'shill': 34469,\n",
       " 'prettier': 16886,\n",
       " 'pasqal': 121373,\n",
       " '1959well': 205875,\n",
       " 'booingwhy': 144272,\n",
       " 'headdoctor': 93373,\n",
       " 'stupidits': 40269,\n",
       " 'wellexcellent': 208786,\n",
       " 'pictureone': 76221,\n",
       " 'goodthree': 101690,\n",
       " 'bayits': 110694,\n",
       " 'teethgnashing': 63010,\n",
       " 'tvok': 160505,\n",
       " 'threeethis': 218750,\n",
       " 'yeiksto': 109357,\n",
       " 'asianlike': 124408,\n",
       " 'nina': 7491,\n",
       " 'civiliansto': 91055,\n",
       " 'roses62': 222170,\n",
       " 'hmmmare': 120543,\n",
       " 'thrilleravoid': 141629,\n",
       " 'bruises': 24229,\n",
       " 'entertainerers': 146611,\n",
       " 'horrendouslywe': 93936,\n",
       " 'pocheas': 170241,\n",
       " 'interviewsnothing': 134568,\n",
       " 'filmby': 33008,\n",
       " 'avenuebut': 97031,\n",
       " 'endureand': 185534,\n",
       " 'wagonmaster': 32134,\n",
       " 'disapears': 222105,\n",
       " 'madelaine': 83076,\n",
       " 'vaudevillesque': 214472,\n",
       " 'gimenez': 85799,\n",
       " 'mercado': 83335,\n",
       " 'mcgarrett': 87139,\n",
       " 'metro': 14091,\n",
       " 'entertainerwhat': 130412,\n",
       " 'laconian': 144804,\n",
       " 'persecutes': 119890,\n",
       " 'racisms': 210367,\n",
       " 'durbervilles': 103696,\n",
       " 'disappointedhopefully': 169419,\n",
       " 'funnist': 185102,\n",
       " 'guideline': 51314,\n",
       " 'thoughtdr': 138024,\n",
       " 'arnt': 36486,\n",
       " 'gawdy': 121072,\n",
       " 'anywayfirst': 64718,\n",
       " 'stevedean': 104353,\n",
       " 'uponand': 88572,\n",
       " 'dated': 1994,\n",
       " 'restgood': 180625,\n",
       " 'prichardrichard': 212458,\n",
       " 'armitages': 188782,\n",
       " 'ridefans': 89037,\n",
       " 'penal': 22677,\n",
       " 'matchwhile': 160725,\n",
       " 'rey': 23619,\n",
       " 'breathtop': 182315,\n",
       " 'agentoh': 142322,\n",
       " 'forgivenessbad': 213308,\n",
       " 'wonderfullythe': 140460,\n",
       " 'dalek': 24086,\n",
       " 'beganlindy': 165785,\n",
       " 'anbthony': 113888,\n",
       " 'awei': 61431,\n",
       " 'raghupati': 132371,\n",
       " 'buttonholes': 89670,\n",
       " 'himherself': 28687,\n",
       " 'vampiremovie': 67570,\n",
       " 'tridevplot': 181362,\n",
       " 'capitaes': 160443,\n",
       " 'hidesonly': 115628,\n",
       " 'recenter': 139481,\n",
       " 'midperformancei': 85804,\n",
       " 'riccardo': 23306,\n",
       " 'tristram': 48248,\n",
       " 'paved': 16261,\n",
       " 'fuyumi': 71852,\n",
       " 'corners': 10121,\n",
       " 'pta': 28441,\n",
       " 'v1': 151767,\n",
       " 'sabretooth': 15521,\n",
       " 'sinatra': 3048,\n",
       " 'quasigoofy': 178732,\n",
       " 'islandbut1': 120927,\n",
       " 'moviesmiles': 134655,\n",
       " 'catlover': 143838,\n",
       " 'everly': 46134,\n",
       " 'echance': 203659,\n",
       " 'scroogedit': 208109,\n",
       " 'analyzed': 15430,\n",
       " 'selfaffirmation': 183228,\n",
       " 'khialdi': 190093,\n",
       " 'coursethis': 48466,\n",
       " 'scriptwise': 36178,\n",
       " 'dancingive': 159058,\n",
       " 'worldalonethis': 121514,\n",
       " 'ogle': 32868,\n",
       " 'malevolent': 11421,\n",
       " 'allard': 55143,\n",
       " 'spoilersit': 67881,\n",
       " 'admirersin': 217326,\n",
       " 'realismthe': 32816,\n",
       " 'baitz': 124969,\n",
       " 'class': 806,\n",
       " 'lousiest': 29697,\n",
       " 'slurantwone': 191650,\n",
       " 'tamper': 38513,\n",
       " 'itgood': 55888,\n",
       " 'sprogin': 167494,\n",
       " 'genreof': 96691,\n",
       " 'fascinates': 25401,\n",
       " 'graphicsnot': 165807,\n",
       " 'everlovely': 175237,\n",
       " 'xiao': 26619,\n",
       " 'uninformed': 20831,\n",
       " 'hellbent': 20992,\n",
       " 'oooohhhscary': 100696,\n",
       " 'bharathi': 137277,\n",
       " 'fcc': 46026,\n",
       " 'turtlenecks': 80827,\n",
       " 'santiniurbano': 211299,\n",
       " 'ignominiously': 151298,\n",
       " 'mrsavage': 217556,\n",
       " 'diversioni': 83552,\n",
       " 'discreet': 17894,\n",
       " 'terrific': 1280,\n",
       " 'northfield': 60642,\n",
       " 'wicket': 143514,\n",
       " 'postsaloon': 124647,\n",
       " 'influencepolanski': 89933,\n",
       " 'torturerape': 193284,\n",
       " 'lofranco': 143987,\n",
       " 'watchable': 1707,\n",
       " 'ageafter': 215185,\n",
       " 'tigron': 200778,\n",
       " 'biographers': 63091,\n",
       " 'envoked': 188271,\n",
       " 'nicethere': 108552,\n",
       " 'dreamsa': 187974,\n",
       " 'colethe': 106559,\n",
       " 'exorcism': 9130,\n",
       " 'premchand': 59890,\n",
       " 'synthesize': 46426,\n",
       " 'movieall': 21587,\n",
       " 'characterone': 46057,\n",
       " 'rightness': 40206,\n",
       " 'expelledmiddle': 117742,\n",
       " 'nonexistence': 41107,\n",
       " 'mcnichols': 103579,\n",
       " 'carlise': 100226,\n",
       " 'seungup': 47139,\n",
       " 'burgess': 11454,\n",
       " 'commentboth': 141252,\n",
       " 'runteldat': 46049,\n",
       " 'tiesfrom': 130807,\n",
       " 'denseand': 184816,\n",
       " 'boozedup': 184278,\n",
       " 'talkingboring': 189318,\n",
       " 'knowns': 60429,\n",
       " 'foundered': 151359,\n",
       " 'pukesif': 176470,\n",
       " 'rioters': 54379,\n",
       " 'spendrating': 165229,\n",
       " 'schmid': 21870,\n",
       " 'denominated': 70218,\n",
       " 'relates': 8344,\n",
       " '1999it': 104785,\n",
       " 'moneymatrix': 126022,\n",
       " '7star': 206955,\n",
       " 'prague': 14819,\n",
       " 'polly': 8017,\n",
       " 'liasons': 57419,\n",
       " 'calledfor': 68139,\n",
       " 'inonly': 199602,\n",
       " 'milliagn': 163457,\n",
       " 'irnonicness': 215928,\n",
       " 'kurosawa': 8155,\n",
       " 'animationregarding': 202497,\n",
       " 'almost': 208,\n",
       " 'personathe': 198129,\n",
       " 'susannah': 14934,\n",
       " 'imbues': 25969,\n",
       " 'meateater': 205356,\n",
       " 'astral': 20629,\n",
       " 'nevksy': 180495,\n",
       " 'warnedat': 120659,\n",
       " 'passages': 10993,\n",
       " 'militias': 192025,\n",
       " 'adaptationgielgud': 186158,\n",
       " 'rugrats': 17839,\n",
       " 'entitled': 5278,\n",
       " 'bodieschris': 208658,\n",
       " 'timeswhos': 212035,\n",
       " 'highcontact': 104308,\n",
       " 'marjoke': 115377,\n",
       " 'furor': 210336,\n",
       " 'twooddly': 218430,\n",
       " 'slauther': 107406,\n",
       " 'asgrownup': 204823,\n",
       " 'cryface': 149330,\n",
       " 'hipswinging': 86504,\n",
       " 'phewi': 180759,\n",
       " 'moviecareer': 108735,\n",
       " 'roadchris': 213737,\n",
       " 'luces': 80531,\n",
       " 'shapeshifter': 71705,\n",
       " 'postpaul': 174572,\n",
       " 'allfruity': 89187,\n",
       " 'ooooozes': 165451,\n",
       " 'pepi': 32700,\n",
       " 'capsuleps': 190862,\n",
       " 'certainties': 74278,\n",
       " 'swardson': 77315,\n",
       " 'lolly': 58362,\n",
       " 'interlopers': 44104,\n",
       " 'viel': 32413,\n",
       " 'cullen': 27615,\n",
       " 'mouthlaura': 173902,\n",
       " 'copranger': 182575,\n",
       " 'legacya': 186860,\n",
       " 'harnett': 71693,\n",
       " 'followclocking': 126756,\n",
       " 'treads': 21823,\n",
       " 'musictvcat': 136590,\n",
       " 'reluctantunlikely': 97156,\n",
       " 'humourthis': 63289,\n",
       " 'daysoverall': 154047,\n",
       " 'thoughat': 59879,\n",
       " 'somemore': 67913,\n",
       " 'comedya': 32151,\n",
       " 'tellingthe': 67017,\n",
       " 'vengeanceseeking': 186409,\n",
       " 'idontneedrevengenormyhusbandsmoney': 143099,\n",
       " 'nearpoverty': 201544,\n",
       " 'steinem': 54433,\n",
       " 'oildrained': 169229,\n",
       " 'uruguayan': 45522,\n",
       " 'fretfully': 198666,\n",
       " 'alonzo': 26397,\n",
       " 'fakethe': 47065,\n",
       " 'exalted': 35924,\n",
       " 'scobs': 214346,\n",
       " 'barebreasted': 54568,\n",
       " 'wheatlry': 104222,\n",
       " 'tite': 173383,\n",
       " 'okdirectors': 182511,\n",
       " 'dewy': 46703,\n",
       " 'reformer': 36772,\n",
       " 'finchis': 156272,\n",
       " 'umekis': 53381,\n",
       " 'raf': 14889,\n",
       " 'etcjudy': 187905,\n",
       " 'greene': 8470,\n",
       " 'offthetop': 116995,\n",
       " 'easywith': 153227,\n",
       " 'personification': 22326,\n",
       " 'expensively': 46794,\n",
       " 'awaywhats': 110353,\n",
       " 'gypsy': 5564,\n",
       " 'spoiledthere': 178759,\n",
       " 'wongundergoes': 216441,\n",
       " '310little': 213741,\n",
       " 'againhe': 60784,\n",
       " 'officethe': 62942,\n",
       " 'carrerahudson': 199840,\n",
       " 'unrealisticsecondly': 125658,\n",
       " 'kellythis': 88628,\n",
       " 'empirical': 61797,\n",
       " 'prejudicesford': 106590,\n",
       " 'discontinuity': 72187,\n",
       " 'sceneryhonest': 157381,\n",
       " 'fadesbest': 96655,\n",
       " 'weakand': 66699,\n",
       " 'cubbyhouse': 166268,\n",
       " 'litja': 44813,\n",
       " 'chasing': 3163,\n",
       " 'mcjannet': 82640,\n",
       " 'spinnable': 80408,\n",
       " 'ozarks': 64054,\n",
       " 'againpaltrow': 147167,\n",
       " 'spotless': 26995,\n",
       " 'interjects': 44928,\n",
       " 'fabios': 139930,\n",
       " 'screwballs': 37573,\n",
       " 'sixthform': 62110,\n",
       " 'hard310': 163057,\n",
       " 'nocount': 43090,\n",
       " 'summerize': 70888,\n",
       " 'difrasso': 159678,\n",
       " 'orgasms': 45145,\n",
       " 'unreadiness': 152121,\n",
       " 'breaktension': 175153,\n",
       " 'arresting': 12593,\n",
       " 'nonprofit': 57713,\n",
       " 'handclawing': 71960,\n",
       " 'etcthere': 75198,\n",
       " 'amazedfrom': 207315,\n",
       " 'lemat': 208777,\n",
       " '1908': 51053,\n",
       " 'whiners': 45067,\n",
       " 'rentfree': 148579,\n",
       " 'soulfulness': 98595,\n",
       " 'roadsthose': 136307,\n",
       " 'denistouns': 112885,\n",
       " 'dannyher': 219911,\n",
       " 'edittake': 107416,\n",
       " 'invincibleto': 199528,\n",
       " 'sourceat': 202114,\n",
       " 'movieseen': 137781,\n",
       " 'awfulsounding': 190493,\n",
       " 'spaders': 212411,\n",
       " 'bloodgutsgorebonesnapping': 208581,\n",
       " 'ewan': 12379,\n",
       " 'mufla': 153654,\n",
       " 'equaledthe': 138772,\n",
       " 'doublethis': 195999,\n",
       " 'kaleidiscopic': 97853,\n",
       " 'shotin': 99417,\n",
       " 'tokers': 69562,\n",
       " 'essenceexistentialists': 108000,\n",
       " 'darwininan': 163798,\n",
       " 'angelinaif': 86031,\n",
       " 'doorbells': 69394,\n",
       " 'superficiality': 20574,\n",
       " 'tiffani': 26138,\n",
       " 'sharron': 60044,\n",
       " '3bs': 184107,\n",
       " 'vigilantestyle': 120416,\n",
       " 'tercerothird': 99027,\n",
       " 'edwardsmasterpiece': 191440,\n",
       " 'yatyuksu': 131243,\n",
       " 'greedily': 50115,\n",
       " 'chancewell': 109775,\n",
       " 'whupa': 117639,\n",
       " 'thismatthau': 101524,\n",
       " 'analysed': 63528,\n",
       " 'ratcher': 75123,\n",
       " 'aeclenching': 119301,\n",
       " 'affectts': 179261,\n",
       " 'selfdeprecatingbut': 108291,\n",
       " 'glassmajor': 145278,\n",
       " 'larquey': 163119,\n",
       " 'girlchild': 101327,\n",
       " 'pit': 5437,\n",
       " 'chekhov': 74194,\n",
       " 'effortthese': 170459,\n",
       " 'wordsin': 72050,\n",
       " 'louisedreyfussesque': 113567,\n",
       " 'squanderedif': 106705,\n",
       " 'reprint': 168030,\n",
       " 'charactersvery': 159221,\n",
       " 'ud': 71424,\n",
       " 'emotionlessness': 106448,\n",
       " 'forcegailard': 125168,\n",
       " 'nondiscriminatory': 162105,\n",
       " 'montagethe': 95260,\n",
       " 'vasectomy': 134667,\n",
       " 'q1': 100861,\n",
       " 'maryland': 22121,\n",
       " 'loosejointed': 220095,\n",
       " 'bilateral': 61795,\n",
       " 'movementnorma': 126060,\n",
       " 'rectangularwhich': 184583,\n",
       " ...}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.word_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = tokenizer.texts_to_sequences(clean_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X = sequence.pad_sequences(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1],\n",
       "       [0, 1],\n",
       "       [0, 1],\n",
       "       ..., \n",
       "       [1, 0],\n",
       "       [1, 0],\n",
       "       [1, 0]], dtype=uint8)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentiment_ohe = np.array(pd.get_dummies(sentiments))\n",
    "sentiment_ohe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((35000, 2087), (35000, 2), (15000, 2087), (15000, 2))"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_X = X[:35000]\n",
    "train_y = sentiment_ohe[:35000]\n",
    "test_X = X[35000:]\n",
    "test_y = sentiment_ohe[35000:]\n",
    "\n",
    "train_X.shape, train_y.shape, test_X.shape, test_y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_3 (Embedding)      (None, 2087, 128)         1280000   \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 2087, 128)         0         \n",
      "_________________________________________________________________\n",
      "lstm_2 (LSTM)                (None, 64)                49408     \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 2)                 130       \n",
      "=================================================================\n",
      "Total params: 1,329,538\n",
      "Trainable params: 1,329,538\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "embed_dim = 128\n",
    "lstm_out = 64\n",
    "\n",
    "model = Sequential()\n",
    "model.add(Embedding(10000, embed_dim, input_length = X.shape[1]))\n",
    "model.add(Dropout(0.2))\n",
    "model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))\n",
    "model.add(Dense(2, activation='softmax'))\n",
    "model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "35000/35000 [==============================] - 5115s - loss: 0.4314 - acc: 0.8086   \n",
      "Epoch 2/5\n",
      "35000/35000 [==============================] - 5328s - loss: 0.3293 - acc: 0.8661   \n",
      "Epoch 3/5\n",
      "35000/35000 [==============================] - 5458s - loss: 0.2708 - acc: 0.8923   \n",
      "Epoch 4/5\n",
      "35000/35000 [==============================] - 5855s - loss: 0.2266 - acc: 0.9128   \n",
      "Epoch 5/5\n",
      "35000/35000 [==============================] - 5681s - loss: 0.1936 - acc: 0.9257   \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x22bbe9760b8>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_size = 100\n",
    "model.fit(train_X, train_y, epochs = 5, batch_size=batch_size, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15000/15000 [==============================] - 503s   \n"
     ]
    }
   ],
   "source": [
    "r = model.predict_classes(test_X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 0, ..., 0, 0, 0], dtype=int64)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = np.array(pd.DataFrame(test_y).idxmax(axis=1))\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             precision    recall  f1-score   support\n",
      "\n",
      "   Negative       0.86      0.90      0.88      7490\n",
      "   Positive       0.89      0.85      0.87      7510\n",
      "\n",
      "avg / total       0.87      0.87      0.87     15000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(classification_report(a, r, target_names=['Negative', 'Positive']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[6717,  773],\n",
       "       [1135, 6375]])"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion_matrix(a, r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fact that it goes where other shows wouldn't dare. Forget pretty pictures painted for mainstream audiences, forget charm, forget romance...OZ doesn't mess around. The first episode I ever saw struck me as so nasty it was surreal, I couldn't say I was ready for it, but as I watched more, I developed a taste for Oz, and got accustomed to the high levels of graphic violence. Not just violence, but injustice (crooked guards who'll be sold out for a nickel, inmates who'll kill on order and get away with it, well mannered, middle class inmates being turned into prison bitches due to their lack of street skills or prison experience) Watching Oz, you may become comfortable with what is uncomfortable viewing....thats if you can get in touch with your darker side.\""
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviews[0]"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
